/******************************************************************************
 * Copyright (c) KylinSoft  Co., Ltd. 2021. All rights reserved.
 * lcr licensed under the Mulan PSL v2.

 * You can use this software according to the terms and conditions of the Mulan PSL v2.
 * You may obtain a copy of Mulan PSL v2 at:
 *     http://license.coscl.org.cn/MulanPSL2
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
 * PURPOSE.
 * See the Mulan PSL v2 for more details.
 * Author: xiapin
 * Create: 2021-12-24
 * Description: provide memory pool implement.
 ******************************************************************************/
#include "obmp.h"
#include <string.h>
#include <unistd.h>
#include <malloc.h>
#include <sys/prctl.h>
#include <sys/syscall.h>
#include "mem_block.h"
#include "obmp_type.h"

#define ALIGNED_TO_8(size)      ALIGN(size, 8)
#define OBMP_LOCK(m)            pthread_mutex_lock(m)
#define OBMP_UNLOCK(m)          pthread_mutex_unlock(m)

static pthread_key_t g_thread_key;
static pthread_once_t g_thread_once;
static mem_pool_t *g_mem_pool;

enum {
    BIG_BLK_LEVEL_L     = 0,
    BIG_BLK_LEVER_H,

    BIG_BLK_LEVER_NR,
};

static void *obmp_gc_thread(void *arg);

/**
 * @description: Allocate a thread cache and init it.
 * @param {U32} block_cnt
 * @param {U8} slice_size
 * @return {thread_cache_t *}
 */
static thread_cache_t *obmp_thread_cache_init(U32 block_cnt)
{
    thread_cache_t *thread_cache = clib_calloc(1, sizeof(thread_cache_t));
    MP_ASSERT(thread_cache != NULL);

    thread_cache->is_main_area = 0;
    pthread_mutex_init(&thread_cache->t_mutex, NULL);

    thread_cache->chunk_list = clib_calloc(1, sizeof(struct list_head));
    INIT_LIST_HEAD(thread_cache->chunk_list);

    return thread_cache;
}

/**
 * @description: Global object init. block count and slice size affect the max memory block size, for example,
 *               slice is 8, max block count is 128, so the max cache block memory size is 8*128 = 1024 byte
 * @param {U32} thread_count
 * @param {U32} small_block_cnt
 * @param {U32} small_slice_size
 * @return {mem_pool_t *}
 */
static mem_pool_t *obmp_global_init
(U32 thread_count, U32 block_cnt)
{
    U32 i = 0;

    mem_pool_t *mp = (mem_pool_t *)clib_malloc(sizeof(mem_pool_t));
    MP_ASSERT(mp != NULL);
    memset(mp, 0, sizeof(mem_pool_t));

    pthread_mutex_init(&mp->m_mutex, NULL);
    mp->used_caches_cnt = 0;
    mp->thread_cache_cnt = thread_count;
    mp->thread_caches = (thread_cache_t **)clib_malloc(thread_count * sizeof(void *));

    for (i = 0; i < thread_count; i++) {
        (mp->thread_caches)[i] = obmp_thread_cache_init(block_cnt);
    }

    return mp;
}

/**
 * @description: For pthread destroy callback.
 * @param {thread_cache_t} *thrd_cache
 * @return {*}
 */
static void on_thrd_cache_recycle(thread_cache_t *thrd_cache)
{
    MP_ASSERT(thrd_cache != NULL);

    if (1 == thrd_cache->is_main_area) {
        return;
    }
    thrd_cache->is_used = 0;
    OBMP_UNLOCK(&thrd_cache->t_mutex);

    OBMP_LOCK(&g_mem_pool->m_mutex);
    g_mem_pool->used_caches_cnt--;
    OBMP_UNLOCK(&g_mem_pool->m_mutex);
}

/**
 * @description: Each process call once, for global initialization.
 * @param {*}
 * @return {*}
 */
static void obmp_init(void)
{
    // pthread_t gc;

    mallopt(M_ARENA_MAX, sysconf(_SC_NPROCESSORS_CONF));
    pthread_key_create(&g_thread_key, (void (*)(void *))on_thrd_cache_recycle);
    g_mem_pool = obmp_global_init(SUPPORT_THREAD_MAX, MAX_BLOCK_COUNT);

    // pthread_create(&gc, NULL, obmp_gc_thread, NULL);
}

/**
 * @description: Obtain memory from thread specific cache.
 * @param {thread_cache_t} *thrd_cache
 * @param {size_t} size
 * @return {*}
 */
static void *obmp_get_mem_from_thread_cache(thread_cache_t *thrd_cache, size_t size)
{
    mem_block_info_t **mem_block;
    U32 block_index = 0;
    size_t align_size = 0;

    MP_ASSERT(thrd_cache != NULL);

    if (size >= MMAP_THRES_HOLD) {
        // TODO: use mmap
        return get_mem_from_clib(size);
    } else if (size > SLICE_HIGH_THRES) {
        block_index = (size >> SLICE_HIGH_SHIFT) - !(size & SLICE_MASK(SLICE_HIGH));
        mem_block = &thrd_cache->block_high[block_index];
        align_size = (block_index + 1) << SLICE_HIGH_SHIFT;
    } else if (size > SLICE_MID_THRES) {
        size -= SLICE_MID_THRES;
        block_index = (size >> SLICE_MID_SHIFT) - !(size & SLICE_MASK(SLICE_MID));
        mem_block = &thrd_cache->block_mid[block_index];
        align_size = SLICE_MID_THRES + ((block_index + 1) << SLICE_MID_SHIFT);
    } else if (size >= 0) {
        block_index = (size >> SLICE_LOW_SHIFT) - !(size & SLICE_MASK(SLICE_LOW));
        mem_block = &thrd_cache->block_low[block_index];
        align_size = (block_index + 1) << SLICE_LOW_SHIFT;
    }

    if (unlikely(*mem_block == NULL)) {
        *mem_block = (mem_block_info_t *)clib_calloc(1, sizeof(mem_block_info_t));
    }

    return get_mem_from_block(thrd_cache, *mem_block, align_size);
}

/**
 * @description: Called when a thread is created and call obmp_malloc, Get tcache from memory pool.
 * @param {mem_pool_t} *mp
 * @return {*} success return valid thread_cache_t *, or NULL pointer if here is no available tcache.
 */
static thread_cache_t *obmp_get_thread_cache(mem_pool_t *mp)
{
    U32 i = 0;
    thread_cache_t *thrd_cache = NULL;

    if (mp->used_caches_cnt >= mp->thread_cache_cnt) {
        return NULL;
    }

    for (i = 0; i < mp->thread_cache_cnt; i++) {
        if (pthread_mutex_trylock(&(mp->thread_caches[i]->t_mutex))) {
            continue;
        }

        thrd_cache = mp->thread_caches[i];
#ifdef OBMP_DEBUG
        thrd_cache->tid = syscall(SYS_gettid);
#endif
        thrd_cache->is_used = 1;
        thrd_cache->is_main_area = (i == 0);

        OBMP_LOCK(&mp->m_mutex);
        mp->used_caches_cnt++;
        OBMP_UNLOCK(&mp->m_mutex);
        return thrd_cache;
    }

    return NULL;
}

static void free_mem_to_tcache(mem_unit_t *mu)
{
    int block_index;
    mem_block_info_t *mem_block;
    size_t size;

    thread_cache_t *thrd_cache = (thread_cache_t *)pthread_getspecific(g_thread_key);
    MP_ASSERT(thread_cache_t != NULL);

    size = mu->info.unit_size;
    if (size > SLICE_HIGH_THRES && size <= MMAP_THRES_HOLD) {
        block_index = (size >> SLICE_HIGH_SHIFT) - !(size & SLICE_MASK(SLICE_HIGH));
        mem_block = thrd_cache->block_high[block_index];
    } else if (size > SLICE_MID_THRES) {
        size -= SLICE_MID_THRES;
        block_index = (size >> SLICE_MID_SHIFT) - !(size & SLICE_MASK(SLICE_MID));
        mem_block = thrd_cache->block_mid[block_index];
    } else if (size >= 0) {
        block_index = (size >> SLICE_LOW_SHIFT) - !(size & SLICE_MASK(SLICE_LOW));
        mem_block = thrd_cache->block_low[block_index];
    } else {
        fprintf(stderr, "invalid memory unit size:%ld\n", size);
        abort();
    }

    mu->next = mem_block->free_list;
    mem_block->free_list = mu;
}

/**
 * @description: Free a address which from clibrary or memory pool.
 * @param {void} *ptr
 * @return {*}
 */
void obmp_free(void *ptr)
{
    if (unlikely(ptr == NULL)) {
        return;
    }

    mem_unit_t *unit_info = ptr_to_mem_unit(ptr);

    /* from mem pool */
    if (MU_IS_FROM_OBMP(unit_info)) {
        if (unit_info->info.belongs == 1) { /* free to memory pool */
            free_mem_to_tcache(unit_info);
        } else {
            clib_free(unit_info);
        }

        return;
    }

    /* not from mem pool, free imediately */
    clib_free(ptr);
}

/**
 * @description: Allocate memory from memory pool.
 * @param {size_t} size
 * @return {*} A pointer to the allocated memory, On error, return NULL
 */
void *obmp_malloc(size_t size)
{
    pthread_once(&g_thread_once, obmp_init);

    if (unlikely(size == 0)) {
        return clib_malloc(0);
    }

    thread_cache_t *thread_cache = (thread_cache_t *)pthread_getspecific(g_thread_key);
    if (unlikely(thread_cache == NULL)) {
        thread_cache = obmp_get_thread_cache(g_mem_pool);
        if (thread_cache == NULL) { // TODO: expand tcache count
            return get_mem_from_clib(size);
        }
        pthread_setspecific(g_thread_key, thread_cache);
    }

    return obmp_get_mem_from_thread_cache(thread_cache, size);
}

/**
 * @description: Reallocate memory from memory pool.
 * @param {size_t} size
 * @return {*} A pointer to the allocated memory, On error, return NULL
 */
static void *obmp_realloc(void *prev, size_t size)
{
    pthread_once(&g_thread_once, obmp_init);
    if (prev == NULL) { /* The same as malloc if prev is null */
        return obmp_malloc(size);
    }

    if (size == 0) { /* The same as free if size is zero */
        obmp_free(prev);
        return NULL;
    }

    /* Use libc realloc if prev pointer is not from obmp */
    mem_unit_t *unit_info = ptr_to_mem_unit(prev);
    if (MU_IS_FROM_OBMP(unit_info)) {
        return clib_realloc(prev, size);
    }

    if (size <= unit_info->info.unit_size) {
        return prev;
    }

    void *new = obmp_malloc(size);
    memcpy(new, prev, (size > unit_info->info.unit_size) ? unit_info->info.unit_size : size);

    if (unit_info->info.belongs == 0) {
        clib_free(unit_info);
    } else {
        free_mem_to_tcache(unit_info);
    }

    return new;
}

/**
 * @description: Get address usable space.
 * @param {*} A pointer to the allocated memory
 * @return {size_t} size
 */
static size_t obmp_malloc_usable_size(void *ptr)
{
    mem_unit_t *unit_info = ptr_to_mem_unit(ptr);

    if (!MU_IS_FROM_OBMP(unit_info)) {
        return clib_malloc_usable_size(ptr);
    }

    return unit_info->info.unit_size;
}

// /**
//  * @description: Release an idle tcache memory back to system.
//  * @param {thread_cache_t} *thread_cache
//  * @return {*}
//  */
// static void obmp_thread_cache_shrink(thread_cache_t *thread_cache)
// {
//     S32 i;
//     block_stats_t blk_stat;

//     for (i = 0; i < BIG_BLK_LEVER_NR; i++) {
//         get_block_status(thread_cache->chunk->buffer, thread_cache->big_blocks[i], &blk_stat);
//         if (blk_stat.used != 0) {
//             return;
//         }
//     }

//     for (i = 0; i < thread_cache->block_cnt; i++) {
//         get_block_status(thread_cache->chunk->buffer, thread_cache->mem_blocks[i], &blk_stat);
//         if (blk_stat.used != 0) {
//             /* If some block is in used, cannot free all cache. */
//             return;
//         }
//     }

//     // clear all blocks
//     for (i = 0; i < BIG_BLK_LEVER_NR; i++) {
//         thread_cache->big_blocks[i]->mem_units = NULL;
//     }

//     for (i = 0; i < thread_cache->block_cnt; i++) {
//         thread_cache->mem_blocks[i]->mem_units = NULL;
//     }
//     release_tcache_chunk(&thread_cache->chunk);
// }

// /**
//  * @description: Scan memory pool and release idle tcache memory occupy.
//  * @param {mem_pool_t} *mem_pool
//  * @return {*}
//  */
// void obmp_shrink(mem_pool_t *mem_pool)
// {
// #define MEM_POOL_LOCK           OBMP_LOCK(&mem_pool->m_mutex)
// #define MEM_POOL_UNLOCK         OBMP_UNLOCK(&mem_pool->m_mutex)
//     U32 i = 0;

//     for (i = 0; i < mem_pool->thread_cache_cnt; i++) {
//         if (mem_pool->thread_caches[i]->chunk == NULL) {
//             continue;
//         }

//         if (pthread_mutex_trylock(&mem_pool->thread_caches[i]->t_mutex)) {
//             continue;
//         }

//         MEM_POOL_LOCK;
//         mem_pool->used_caches_cnt++;
//         MEM_POOL_UNLOCK;

//         obmp_thread_cache_shrink(mem_pool->thread_caches[i]);
//         OBMP_UNLOCK(&mem_pool->thread_caches[i]->t_mutex);

//         MEM_POOL_LOCK;
//         mem_pool->used_caches_cnt--;
//         MEM_POOL_UNLOCK;
//     }
// }

/**
 * @description: memory global option set.
 * @param {MP_OPT} opt
 * @param {void} *value
 * @return {S32}
 */
S32 obmp_set(MP_OPT opt, void *value)
{
    OBMP_UNUSED_VAR(opt);
    OBMP_UNUSED_VAR(value);

    thread_cache_t *cache = pthread_getspecific(g_thread_key);

    if (cache != NULL) {
        OBMP_UNLOCK(&cache->t_mutex);

        OBMP_LOCK(&g_mem_pool->m_mutex);
        g_mem_pool->used_caches_cnt--;
        OBMP_UNLOCK(&g_mem_pool->m_mutex);
    }

    pthread_setspecific(g_thread_key, g_mem_pool->thread_caches[0]);

    return 0;
}

static void print_blocks_state(mem_block_info_t **mem_block, U32 block_cnt)
{
    U32 i = 0;
    size_t alloced, used;

    for (i = 0; i < block_cnt; i++) {
        if (get_block_status(mem_block[i], &alloced, &used)) {
                continue;
        }

        fprintf(stderr, "block:%d alloced:%ld use:%ld\n", i, alloced, used);
    }
}

/**
 * @description: For debug, print each thread cache memory usage state to stderr.
 * @param {*}
 * @return {*}
 */
void obmp_print_cache_state(void)
{
#define DBG_CACHE   (g_mem_pool->thread_caches[i])
    U32 i;

    for (i = 0; i < g_mem_pool->thread_cache_cnt; i++) {
        fprintf(stderr, "Cache:%u used:%u\n", i, DBG_CACHE->is_used);
        if (DBG_CACHE->is_used == 0) {
            continue;
        }

        fprintf(stderr, "High lever block\n");
        print_blocks_state(DBG_CACHE->block_high, sizeof(DBG_CACHE->block_high)/sizeof(void *));

        fprintf(stderr, "Mid lever block\n");
        print_blocks_state(DBG_CACHE->block_mid, sizeof(DBG_CACHE->block_mid)/sizeof(void *));

        fprintf(stderr, "Low lever block\n");
        print_blocks_state(DBG_CACHE->block_low, sizeof(DBG_CACHE->block_low)/sizeof(void *));
    }
}

/**
 * @description: Memory recycle thread.
 * @param {void} *arg
 * @return {*}
 */
static void *obmp_gc_thread(void *arg)
{
    OBMP_UNUSED_VAR(arg);
    pthread_detach(pthread_self());
    prctl(PR_SET_NAME, "mempool_gc");

    U16 count = 0;
    U16 last_used_cnt = 0;

    while (g_mem_pool) {
        if (last_used_cnt != g_mem_pool->used_caches_cnt) {
            last_used_cnt = g_mem_pool->used_caches_cnt;
            count = 0;
            sleep(10);
            continue;
        }

        if (g_mem_pool->used_caches_cnt <= (g_mem_pool->thread_cache_cnt >> 1)) {
            count++;
        } else {
            count = 0;
        }

#ifdef OBMP_DEBUG
        struct mallinfo info = mallinfo();
        fprintf(stderr, "library malloced:%d\n", info.uordblks);

        // malloc_stats();
#endif
        /* Detect every 15 seconds, if thread cache is stable, try to release idle memory chunk */
        // if (count == 3) {
        //     malloc_trim(0);
        //     obmp_shrink(g_mem_pool);
        //     count = 0;
        // }

        sleep(5);
    }

    return NULL;
}

void *obmp_calloc(size_t nmemb, size_t size)
{
    void *p = obmp_malloc(nmemb * size);
    if (p != NULL) {
        memset(p, 0, nmemb * size);
    }

    return p;
}

void *obmp_strdup(const char *s)
{
    U32 len = strlen(s) + 1;
    void *p = obmp_malloc(len);

    if (p != NULL) {
        strncpy(p, s, len);
    }

    return p;
}

void *obmp_strndup(const char *s, size_t n)
{
    void *p = obmp_malloc(n);
    if (p != NULL) {
        strncpy(p, s, n);
    }

    return p;
}


/**
 * @description: For hook free function, as a result of some pointer probably freed by a third party library.
 * @param {*}
 * @return {*}
 */
void free(void *p)
{
    obmp_free(p);
}

/**
 * @description: For hook realloc function
 * @param {*}
 * @return {*}
 */
void *realloc(void *ptr, size_t size)
{
    return obmp_realloc(ptr, size);
}

/**
 * @description: For hook malloc_usable_size function
 * @param {*}
 * @return {*}
 */
size_t malloc_usable_size(void *ptr)
{
    return obmp_malloc_usable_size(ptr);
}
